import Levenshtein
import pandas as pd

class RefClassifier(object):
    def __init__(self, similarity_threshold: float):
        self._similarity_threshold = similarity_threshold
        self._classified_ref_list = []

    def ref_classify(self, text: str):
        found = False

        for v in self._classified_ref_list: 
            if self._compare(text, v[0]):
                v[1] += 1
                found = True
                break
            
        if not found:
            self._classified_ref_list.append([text, 1]) 

    def refs_classify(self, texts: list):
        for t in texts:
            self.ref_classify(t)

    def export(self, filepath):
        df1 = pd.DataFrame(self._classified_ref_list, columns=['name','count'])
        df2 = pd.DataFrame([["total:", df1['count'].sum()]], columns=['name','count'])
        pd.concat([df1, df2], ignore_index=True).to_excel(filepath)

    def _compare(self, str1, str2):
        if self._similarity_threshold >=1:
            return False

        sim = Levenshtein.ratio(str1, str2)

        if sim > self._similarity_threshold:
            return True
        else:
            return False